#!/usr/bin/perl
#
# Copyright (C) 2013,2014,2019 Olly Betts
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

use strict;
use warnings;

# Usage:
#
# Run strace like so:
#
# strace -y -s0 -efdatasync,fsync,pread64,pwrite64 -o strace.log COMMAND ARGS...
# ./strace-analyse strace.log
#
# Or for strace < 4.7:
#
# strace -s0 -eclose,dup,dup2,dup3,fdatasync,fsync,open,pread64,pwrite64 -o strace.log COMMAND ARGS...
# ./strace-analyse strace.log
#
# Passing -s0 is optional, but doing so reduces the log size by stopping strace
# from writing out any of the contents of blocks being read or written (default
# is to write up to 32 bytes).
#
# You can also pass -r, -i, -t, -tt and/or -ttt - the script can parse out the
# extra information these add, but it currently doesn't do anything with it.

# Track the filename currently corresponding to each fd (unless -y is used).
my @fd = qw(STDIN STDOUT STDERR);
while (<>) {
    my $systime;
    if (s/ <([0-9.]+)>$//) {
	# -T was used.
	$systime = 0 + $1;
    }
    my $time_t;
    if (s/^\s*([0-9.]+) //) {
	my $t = 0 + $1;
	if ($t < 1e9) {
	    # -r was used - relative time since start of previous syscall.
	} else {
	    # -ttt was used - seconds since epoch.
	    $time_t = $t;
	}
    } elsif (s/^[0-9][0-9]:[0-9][0-9]:[0-9][0-9](?:\.[0-9]*)? //) {
	# -t or -tt was used.
    }
    my $addr;
    if (s/^\[([0-9a-f]+)\] //) {
	# -i was used: instruction pointer.
	$addr = $1;
    }
    if (/^pread(?:64)?\((\d+)(?:<(.*?)>)?, .*, (\d+), (\d+)\)/) {
        my ($fd, $path, $blocksize, $off) = ($1, $2, $3, $4);
        my $block = $off / $blocksize;
	$path //= $fd[$fd];
        print "read $block from $path\n";
    } elsif (/^pwrite(?:64)?\((\d+)(?:<(.*?)>)?, .*, (\d+), (\d+)\)/) {
	my ($fd, $path, $blocksize, $off) = ($1, $2, $3, $4);
        my $block = $off / $blocksize;
	$path //= $fd[$fd];
        print "write $block to $path\n";
    } elsif (/^open\("(.*)".* = (\d+)$/) {
	$fd[$2] = $1;
    } elsif (/^openat\(AT_FDCWD, "(.*)".* = (\d+)$/) {
	$fd[$2] = $1;
    } elsif (/^close\((\d+)\)/) {
        $fd[$1] = undef;
    } elsif (/^dup[23]?\((\d+)[^<].* = (\d+)/) {
        $fd[$2] = $fd[$1];
    } elsif (/^f(?:data)?sync\((\d+)(?:<(.*?)>)?\)/) {
	my $path = $2 // $fd[$1];
        print "sync $path\n";
    }
}
